Analysis date: 2021-02-12

Setup

Load libraries

library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(ggbeeswarm)
library(DESeq2)
library(tidyverse)
library(limma)
library(MultiAssayExperiment)
library(gplots)

Load data

source("data/Figure_layouts.R")
load("data/CLL_Proteomics_Setup.RData")
load("data/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])

Analysis

Associations of protein abundance with genetic alterations

Wilcox test

Function

wilcox_proteins_any <- function(g, alteration, plot=FALSE, output=TRUE){
  if("SNPs" %in% (experiments(multiomics_MAE[alteration,,]) %>% names()) ){
    ty="SNPs_"}else if("chrom_abber" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
      ty="chrom_abber_"}else if("health_record_bin" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
        ty="health_record_bin_"}
  dat_g <- wideFormat(multiomics_MAE[c(g , alteration),,] ) %>% as_tibble() 
  dat_g <- dat_g %>% mutate(alt =as.logical(get(paste0(ty, alteration), envir=as.environment(dat_g))))
  
  if(grepl("ENSG", g)){
    assay_data="RNAseq_norm_"
    cap <- metadata(multiomics_MAE)$gene_symbol_mapping %>% filter(ensembl_gene_id==g) %>% .$hgnc_symbol
    col_b <- "#92c5de"
    col_r <- "#f4a582"
    cap_y <- paste("Transcript abundance", cap)
    }else{
    assay_data="proteomics_"
    cap <- g
    col_b <- "#0571b0"
    col_r <- "#ca0020"
    cap_y <- paste("Protein abundance", cap)
  }
  
  wt <- dat_g %>% filter(alt==0) %>% .[,paste0(assay_data, g)] %>% unlist
  mut <- dat_g %>% filter(alt==1) %>% .[,paste0(assay_data, g)] %>% unlist
  alt_lab <- alteration
  alt_lab <- gsub("_mutated", "", alt_lab)
  wt_lab <- "wt"
  mut_lab <- "mut"
  if(alt_lab=="IGHV"){
    wt_lab="U-CLL"
  mut_lab="M-CLL"}
  
  wx.test <- wilcox.test(wt, mut)
  if(plot==TRUE){
    p <- dat_g %>% filter(!is.na(alt )) %>% 
      ggplot(aes_string("alt", paste0(assay_data,g), fill="alt" )) + 
      geom_boxplot() + geom_beeswarm() + 
      xlab(alt_lab) +
      scale_x_discrete(labels=c(wt_lab, mut_lab)) +
      ylab(cap_y)+
      pp_sra + 
      scale_fill_manual(values = c(col_b,col_r)) 
      
    print(p+theme(aspect.ratio=2)+ theme(legend.position = 'none') +
      stat_compare_means(method = "wilcox", label = "p.signif", 
                         comparisons = list(c("TRUE","FALSE"))) )
    print(wx.test$p.value)
  }
  if(output==TRUE){return(c(g,  alteration, length(wt), length(mut), wx.test$p.value))
    }else if(all(plot==TRUE, output=="plot")){return(p)}
}
ZAP70_P_plot <- wilcox_proteins_any(g="ZAP70", plot = TRUE, output = "plot", alteration="IGHV_mutated")

## [1] 1.198102e-07
ZAP70_R_plot <- wilcox_proteins_any(g="ENSG00000115085", plot = TRUE, output = "plot", alteration="IGHV_mutated")

## [1] 5.071175e-08
ATM_P_plot <- wilcox_proteins_any(g="ATM", plot = TRUE, output = "plot", alteration="ATM")

## [1] 0.001030721
ATM_R_plot <- wilcox_proteins_any(g="ENSG00000149311", plot = TRUE, output = "plot", alteration="ATM")

## [1] 0.1724928
TP53_P_plot <- wilcox_proteins_any(g="TP53", plot = TRUE, output = "plot", alteration="TP53")

## [1] 0.0001178339
TP53_R_plot <- wilcox_proteins_any(g="ENSG00000141510", plot = TRUE, output = "plot", alteration="TP53")

## [1] 0.005025578
SF3B1_P_plot <- wilcox_proteins_any(g="SF3B1", plot = TRUE, alteration="SF3B1", output = "plot")

## [1] 0.2489914
XPO1_P_plot <- wilcox_proteins_any(g="XPO1", plot = TRUE, output = "plot", alteration="XPO1")

## [1] 0.003788337
XPO1_R_plot <- wilcox_proteins_any(g="ENSG00000082898", plot = TRUE, output = "plot", alteration="XPO1")

## [1] 0.1821531
IGHM_P_plot <- wilcox_proteins_any(g="IGHM", plot = TRUE, alteration="IGHV_mutated", output = "plot")

## [1] 0.00160812

Chromosome position and protein abundance

plot_chromosome_theme <- list(
    coord_cartesian(ylim=c(-0.7,0.7)),
    facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
    ylab("log2 norm. protein abundance"),
    xlab("Location gene for protein on chromosome [bp]")
)
Chr12_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_trisomy12),
          chromosome_name %in% c("12")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("trisomy12") +
  geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=133275309, color="gray40", size=1.5,  fill=NA) +
  scale_color_manual(values=c("#0571b0", "#ca0020"), name = "", 
                       labels = c("wt", "trisomy 12" ))
Chr12_P_plot +  theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

Chr11_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_del11q),
          chromosome_name %in% c("11")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("del11q")+
  geom_rect(xmin = 97400000, ymin=-0.68, ymax=0.68, xmax=110600000, color="gray40", size=1.5,  fill=NA) +
  scale_color_manual(values=c("#0571b0", "#ca0020"), name = "", 
                       labels = c("wt","del11q"))
Chr11_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

Chr13_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_del13q14),
          chromosome_name %in% c("13")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("del13q")+
  geom_rect(xmin = 47000000,ymin=-0.68, ymax=0.68, xmax=51000000, color="gray40", size=1.5,  fill=NA) +
  scale_color_manual(values=c("#0571b0", "#ca0020"), name = "", 
                       labels = c("wt","del13q"))
Chr13_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

message("In the region in which the deletion occurs (47Mb-51Mb, 2011 Ouillette et al.) the data are probably very noisy as the number of proteins we detected in the affected region is small:")
## In the region in which the deletion occurs (47Mb-51Mb, 2011 Ouillette et al.) the data are probably very noisy as the number of proteins we detected in the affected region is small:
proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("13"), start_position>47000000, start_position< 51000000) %>% dplyr::select(rowname) %>% unique() %>% nrow()
## [1] 13
Chr17_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_del17p13 ),
          chromosome_name %in% c("17")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("del17p")+
  geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=10800000, color="gray40", size=1.5,  fill=NA) +
  scale_color_manual(values=c("#0571b0", "#ca0020"), name = "", 
                       labels = c("wt","del17p"))
Chr17_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')  
## `geom_smooth()` using formula 'y ~ x'

Chr8_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_gain8q24 ),
          chromosome_name %in% c("8")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("gain8q24")+
  geom_rect(xmin = 117700001, ymin=-0.68, ymax=0.68, xmax=146364022, color="gray40", size=1.5,  fill=NA) +
  scale_color_manual(values=c("#0571b0", "#ca0020"), name = "", 
                       labels = c("wt","gain8q"))
Chr8_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

Chromosome position and RNA

Creat df

plot_chromosome_theme_RNA <- list(
    #coord_cartesian(ylim=c(0.95,1.1)),
    coord_cartesian(ylim=c(0.95,1.05)),
    facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
    ylab("log10 norm. RNA counts"),
    xlab("Location gene on chromosome [bp]")
)

RNA_biomart<- left_join(
  left_join((longFormat(multiomics_MAE[,,"RNAseq_norm"] ) %>% as_tibble()), 
                                         metadata(multiomics_MAE)$gene_symbol_mapping[c(1,2,3,5:7)], 
                                         by=c("rowname"="ensembl_gene_id")),
  proteomics_tbl_meta_biomart_chrab, by=c("primary"))
## harmonizing input:
##   removing 531 sampleMap rows not in names(experiments)
##   removing 22 colData rownames not in sampleMap 'primary'
RNA_biomart <- left_join( left_join(RNA_biomart,
          proteomics_tbl_meta_biomart_SNP, by=c("primary")),
          proteomics_tbl_meta_biomart_health, by=c("primary"))

Normalise RNA to mean per gene

RNA_biomart_mean <- RNA_biomart %>%
  group_by(rowname) %>%
  summarise( mean_value = mean(value, na.rm = TRUE) )

RNA_biomart <- left_join(
  RNA_biomart,
  RNA_biomart_mean ) %>%
  mutate(value = value/mean_value)
## Joining, by = "rowname"

Plot

# Trisomy 12
Chr12_R_plot <- RNA_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_trisomy12),
          chromosome_name %in% c("12")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("trisomy12") +
  geom_rect(xmin = 0, ymin=0.95, ymax=1.05, xmax=133275309, color="gray40", size=1.5,  fill=NA) +    scale_color_manual(values=c("#92c5de", "#f4a582"), name = "", 
                       labels = c("wt","trisomy 12"))
Chr12_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

# del11q
Chr11_R_plot <- RNA_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_del11q),
          chromosome_name %in% c("11")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("del11q")+
  geom_rect(xmin = 97400000, ymin=0.95, ymax=1.05, xmax=110600000, color="gray40", size=1.5,  fill=NA) +    
  scale_color_manual(values=c("#92c5de", "#f4a582"), name = "", 
                       labels = c("wt","del11q"))
Chr11_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

# del13q
Chr13_R_plot <- RNA_biomart %>%
  filter( !is.na(value), !is.na( chrom_abber_del13q14),
          chromosome_name %in% c("13")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("del13q")+
  geom_rect(xmin = 47000000, ymin=0.95, ymax=1.05, xmax=51000000, color="gray40", size=1.5,  fill=NA) +    
  scale_color_manual(values=c("#92c5de", "#f4a582"), name = "", 
                       labels = c("wt","del13q"))
Chr13_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

# del17p
Chr17_R_plot <- RNA_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_del17p13),
          chromosome_name %in% c("17")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("del17p")+
  geom_rect(xmin = 0, ymin=0.95, ymax=1.05, xmax=10800000, color="gray40", size=1.5,  fill=NA) + 
  scale_color_manual(values=c("#92c5de", "#f4a582"), name = "", 
                       labels = c("wt","del17p"))
Chr17_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

Chr8_R_plot <- RNA_biomart %>%
  filter( !is.na(value), !is.na(chrom_abber_gain8q24),
          chromosome_name %in% c("8")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("gain8q24")+
  geom_rect(xmin = 117700001, ymin=0.95, ymax=1.05, xmax=146364022, color="gray40", size=1.5,  fill=NA) +    
  scale_color_manual(values=c("#92c5de", "#f4a582"), name = "", 
                       labels = c("wt","gain8q"))
Chr8_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 
## `geom_smooth()` using formula 'y ~ x'

Save important plots

save(ZAP70_P_plot, ZAP70_R_plot, TP53_P_plot, TP53_R_plot, ATM_P_plot, ATM_R_plot, Chr12_P_plot, Chr17_P_plot, 
     Chr12_R_plot, Chr17_R_plot, SF3B1_P_plot, XPO1_P_plot, XPO1_R_plot, Chr13_P_plot, Chr11_P_plot, 
     Chr13_R_plot, Chr11_R_plot, Chr8_P_plot,Chr8_R_plot, IGHM_P_plot, 
file = "RData_plots/CLL_Proteomics_Genetics_Plots.RData")

Session Info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] gplots_3.1.1                MultiAssayExperiment_1.14.0
##  [3] limma_3.44.3                forcats_0.5.1              
##  [5] stringr_1.4.0               dplyr_1.0.3                
##  [7] purrr_0.3.4                 readr_1.4.0                
##  [9] tidyr_1.1.2                 tibble_3.0.6               
## [11] tidyverse_1.3.0             DESeq2_1.28.1              
## [13] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [15] matrixStats_0.58.0          Biobase_2.48.0             
## [17] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [19] IRanges_2.22.2              S4Vectors_0.26.1           
## [21] BiocGenerics_0.34.0         ggbeeswarm_0.6.0           
## [23] ggpubr_0.4.0                Hmisc_4.4-2                
## [25] ggplot2_3.3.3               Formula_1.2-4              
## [27] survival_3.2-7              lattice_0.20-41            
## [29] Matrix_1.3-2                pheatmap_1.0.12            
## [31] gtools_3.8.2                plyr_1.8.6                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.2.1        splines_4.0.2         
##   [4] BiocParallel_1.22.0    digest_0.6.27          htmltools_0.5.1.1     
##   [7] magrittr_2.0.1         checkmate_2.0.0        memoise_2.0.0         
##  [10] cluster_2.1.0          openxlsx_4.2.3         annotate_1.66.0       
##  [13] modelr_0.1.8           jpeg_0.1-8.1           colorspace_2.0-0      
##  [16] blob_1.2.1             rvest_0.3.6            haven_2.3.1           
##  [19] xfun_0.20              crayon_1.4.0           RCurl_1.98-1.2        
##  [22] jsonlite_1.7.2         genefilter_1.70.0      glue_1.4.2            
##  [25] gtable_0.3.0           zlibbioc_1.34.0        XVector_0.28.0        
##  [28] car_3.0-10             abind_1.4-5            scales_1.1.1          
##  [31] DBI_1.1.1              rstatix_0.6.0          Rcpp_1.0.6            
##  [34] xtable_1.8-4           htmlTable_2.1.0        foreign_0.8-81        
##  [37] bit_4.0.4              htmlwidgets_1.5.3      httr_1.4.2            
##  [40] RColorBrewer_1.1-2     ellipsis_0.3.1         pkgconfig_2.0.3       
##  [43] XML_3.99-0.5           farver_2.0.3           nnet_7.3-15           
##  [46] dbplyr_2.0.0           locfit_1.5-9.4         tidyselect_1.1.0      
##  [49] labeling_0.4.2         rlang_0.4.10           AnnotationDbi_1.50.3  
##  [52] munsell_0.5.0          cellranger_1.1.0       tools_4.0.2           
##  [55] cachem_1.0.1           cli_2.3.0              generics_0.1.0        
##  [58] RSQLite_2.2.3          broom_0.7.4            evaluate_0.14         
##  [61] fastmap_1.1.0          yaml_2.2.1             knitr_1.31            
##  [64] bit64_4.0.5            fs_1.5.0               zip_2.1.1             
##  [67] caTools_1.18.1         nlme_3.1-151           xml2_1.3.2            
##  [70] compiler_4.0.2         rstudioapi_0.13        beeswarm_0.2.3        
##  [73] curl_4.3               png_0.1-7              ggsignif_0.6.0        
##  [76] reprex_1.0.0           geneplotter_1.66.0     stringi_1.5.3         
##  [79] highr_0.8              vctrs_0.3.6            pillar_1.4.7          
##  [82] lifecycle_0.2.0        data.table_1.13.6      bitops_1.0-6          
##  [85] R6_2.5.0               latticeExtra_0.6-29    KernSmooth_2.23-18    
##  [88] gridExtra_2.3          rio_0.5.16             vipor_0.4.5           
##  [91] assertthat_0.2.1       withr_2.4.1            GenomeInfoDbData_1.2.3
##  [94] mgcv_1.8-33            hms_1.0.0              grid_4.0.2            
##  [97] rpart_4.1-15           rmarkdown_2.6          carData_3.0-4         
## [100] lubridate_1.7.9.2      base64enc_0.1-3